In [1]:
!pip install pandas==2.3.0
Requirement already satisfied: pandas==2.3.0 in c:\users\aditya singh\anaconda3\lib\site-packages (2.3.0) Requirement already satisfied: numpy>=1.26.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (1.26.4) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2023.3) Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas==2.3.0) (1.16.0)
In [2]:
pip install numpy==1.26.4
Requirement already satisfied: numpy==1.26.4 in c:\users\aditya singh\anaconda3\lib\site-packages (1.26.4) Note: you may need to restart the kernel to use updated packages.
In [3]:
!pip install numpy==1.26.4 --upgrade --force-reinstall
Collecting numpy==1.26.4
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
Installing collected packages: numpy
Attempting uninstall: numpy
Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
Successfully uninstalled numpy-1.26.4
Successfully installed numpy-1.26.4
In [4]:
!pip install matplotlib==3.10.3
!pip install seaborn==0.13.2
!pip install plotly==6.1.2
Requirement already satisfied: matplotlib==3.10.3 in c:\users\aditya singh\anaconda3\lib\site-packages (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.4.4)
Requirement already satisfied: numpy>=1.23 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.26.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib==3.10.3) (1.16.0)
Requirement already satisfied: seaborn==0.13.2 in c:\users\aditya singh\anaconda3\lib\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (1.26.4)
Requirement already satisfied: pandas>=1.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (2.3.0)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas>=1.2->seaborn==0.13.2) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas>=1.2->seaborn==0.13.2) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.16.0)
Collecting plotly==6.1.2
Using cached plotly-6.1.2-py3-none-any.whl.metadata (6.9 kB)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly==6.1.2) (1.42.0)
Requirement already satisfied: packaging in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly==6.1.2) (24.1)
Using cached plotly-6.1.2-py3-none-any.whl (16.3 MB)
Installing collected packages: plotly
Attempting uninstall: plotly
Found existing installation: plotly 6.2.0
Uninstalling plotly-6.2.0:
Successfully uninstalled plotly-6.2.0
Successfully installed plotly-6.1.2
In [5]:
!pip install pandas==2.3.0
!pip install numpy==2.3.0
!pip install matplotlib==3.10.3
!pip install seaborn==0.13.2
!pip install plotly==6.1.2
Requirement already satisfied: pandas==2.3.0 in c:\users\aditya singh\anaconda3\lib\site-packages (2.3.0)
Requirement already satisfied: numpy>=1.26.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (1.26.4)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas==2.3.0) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas==2.3.0) (1.16.0)
Collecting numpy==2.3.0
Using cached numpy-2.3.0-cp312-cp312-win_amd64.whl.metadata (60 kB)
Using cached numpy-2.3.0-cp312-cp312-win_amd64.whl (12.7 MB)
Installing collected packages: numpy
Attempting uninstall: numpy
Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
Successfully uninstalled numpy-1.26.4
Successfully installed numpy-2.3.0
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. contourpy 1.2.0 requires numpy<2.0,>=1.20, but you have numpy 2.3.0 which is incompatible. gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.3.0 which is incompatible. numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.3.0 which is incompatible. scipy 1.13.1 requires numpy<2.3,>=1.22.4, but you have numpy 2.3.0 which is incompatible.
Requirement already satisfied: matplotlib==3.10.3 in c:\users\aditya singh\anaconda3\lib\site-packages (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (1.4.4)
Requirement already satisfied: numpy>=1.23 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (2.3.0)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib==3.10.3) (2.9.0.post0)
Collecting numpy>=1.23 (from matplotlib==3.10.3)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib==3.10.3) (1.16.0)
Using cached numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)
Installing collected packages: numpy
Attempting uninstall: numpy
Found existing installation: numpy 2.3.0
Uninstalling numpy-2.3.0:
Successfully uninstalled numpy-2.3.0
Successfully installed numpy-1.26.4
Requirement already satisfied: seaborn==0.13.2 in c:\users\aditya singh\anaconda3\lib\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (1.26.4)
Requirement already satisfied: pandas>=1.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (2.3.0)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\aditya singh\anaconda3\lib\site-packages (from seaborn==0.13.2) (3.10.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas>=1.2->seaborn==0.13.2) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas>=1.2->seaborn==0.13.2) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn==0.13.2) (1.16.0)
Requirement already satisfied: plotly==6.1.2 in c:\users\aditya singh\anaconda3\lib\site-packages (6.1.2)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly==6.1.2) (1.42.0)
Requirement already satisfied: packaging in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly==6.1.2) (24.1)
In [6]:
!pip check
No broken requirements found.
In [7]:
pip install --upgrade pandas matplotlib seaborn plotly
Requirement already satisfied: pandas in c:\users\aditya singh\anaconda3\lib\site-packages (2.3.0)
Requirement already satisfied: matplotlib in c:\users\aditya singh\anaconda3\lib\site-packages (3.10.3)
Requirement already satisfied: seaborn in c:\users\aditya singh\anaconda3\lib\site-packages (0.13.2)
Requirement already satisfied: plotly in c:\users\aditya singh\anaconda3\lib\site-packages (6.1.2)
Collecting plotly
Using cached plotly-6.2.0-py3-none-any.whl.metadata (8.5 kB)
Requirement already satisfied: numpy>=1.26.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas) (1.26.4)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\aditya singh\anaconda3\lib\site-packages (from pandas) (2023.3)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (24.1)
Requirement already satisfied: pillow>=8 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (10.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from matplotlib) (3.1.2)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\aditya singh\anaconda3\lib\site-packages (from plotly) (1.42.0)
Requirement already satisfied: six>=1.5 in c:\users\aditya singh\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)
Using cached plotly-6.2.0-py3-none-any.whl (9.6 MB)
Installing collected packages: plotly
Attempting uninstall: plotly
Found existing installation: plotly 6.1.2
Uninstalling plotly-6.1.2:
Successfully uninstalled plotly-6.1.2
Successfully installed plotly-6.2.0
Note: you may need to restart the kernel to use updated packages.
In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from warnings import filterwarnings
filterwarnings('ignore')
In [9]:
print("\033[1mFile Uploading\033[0m")
File Uploading
In [10]:
ipl_data = pd.read_csv("IPL.csv")
In [11]:
ipl_data.head()
Out[11]:
| Unnamed: 0 | match_id | date | match_type | event_name | innings | batting_team | bowling_team | over | ball | ... | team_runs | team_balls | team_wicket | new_batter | batter_runs | batter_balls | bowler_wicket | batting_partners | next_batter | striker_out | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 131970 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 1 | ... | 1 | 1 | 0 | NaN | 0 | 1 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 1 | 131971 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 2 | ... | 1 | 2 | 0 | NaN | 0 | 1 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 2 | 131972 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 3 | ... | 2 | 2 | 0 | NaN | 0 | 1 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 3 | 131973 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 3 | ... | 2 | 3 | 0 | NaN | 0 | 2 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 4 | 131974 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 4 | ... | 2 | 4 | 0 | NaN | 0 | 3 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
5 rows × 64 columns
In [12]:
ipl_data.shape
Out[12]:
(278205, 64)
In [13]:
ipl_data.columns
Out[13]:
Index(['Unnamed: 0', 'match_id', 'date', 'match_type', 'event_name', 'innings',
'batting_team', 'bowling_team', 'over', 'ball', 'ball_no', 'batter',
'bat_pos', 'runs_batter', 'balls_faced', 'bowler', 'valid_ball',
'runs_extras', 'runs_total', 'runs_bowler', 'runs_not_boundary',
'extra_type', 'non_striker', 'non_striker_pos', 'wicket_kind',
'player_out', 'fielders', 'runs_target', 'review_batter',
'team_reviewed', 'review_decision', 'umpire', 'umpires_call',
'player_of_match', 'match_won_by', 'win_outcome', 'toss_winner',
'toss_decision', 'venue', 'city', 'day', 'month', 'year', 'season',
'gender', 'team_type', 'superover_winner', 'result_type', 'method',
'balls_per_over', 'overs', 'event_match_no', 'stage', 'match_number',
'team_runs', 'team_balls', 'team_wicket', 'new_batter', 'batter_runs',
'batter_balls', 'bowler_wicket', 'batting_partners', 'next_batter',
'striker_out'],
dtype='object')
In [14]:
print("\033[1mData Cleaning\033[0m")
Data Cleaning
In [15]:
ipl_data.head()
ipl_data.info()
ipl_data.describe()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 278205 entries, 0 to 278204 Data columns (total 64 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 278205 non-null int64 1 match_id 278205 non-null int64 2 date 278205 non-null object 3 match_type 278205 non-null object 4 event_name 278205 non-null object 5 innings 278205 non-null int64 6 batting_team 278205 non-null object 7 bowling_team 278205 non-null object 8 over 278205 non-null int64 9 ball 278205 non-null int64 10 ball_no 278205 non-null float64 11 batter 278205 non-null object 12 bat_pos 278205 non-null int64 13 runs_batter 278205 non-null int64 14 balls_faced 278205 non-null int64 15 bowler 278205 non-null object 16 valid_ball 278205 non-null int64 17 runs_extras 278205 non-null int64 18 runs_total 278205 non-null int64 19 runs_bowler 278205 non-null int64 20 runs_not_boundary 278205 non-null bool 21 extra_type 15133 non-null object 22 non_striker 278205 non-null object 23 non_striker_pos 278205 non-null int64 24 wicket_kind 13823 non-null object 25 player_out 13823 non-null object 26 fielders 10013 non-null object 27 runs_target 133903 non-null float64 28 review_batter 872 non-null object 29 team_reviewed 872 non-null object 30 review_decision 872 non-null object 31 umpire 872 non-null object 32 umpires_call 278205 non-null bool 33 player_of_match 278205 non-null object 34 match_won_by 278205 non-null object 35 win_outcome 273503 non-null object 36 toss_winner 278205 non-null object 37 toss_decision 278205 non-null object 38 venue 278205 non-null object 39 city 278205 non-null object 40 day 278205 non-null int64 41 month 278205 non-null int64 42 year 278205 non-null int64 43 season 278205 non-null object 44 gender 278205 non-null object 45 team_type 278205 non-null object 46 superover_winner 3896 non-null object 47 result_type 4702 non-null object 48 method 3890 non-null object 49 balls_per_over 278205 non-null int64 50 overs 278205 non-null int64 51 event_match_no 278205 non-null object 52 stage 278205 non-null object 53 match_number 278205 non-null object 54 team_runs 278205 non-null int64 55 team_balls 278205 non-null int64 56 team_wicket 278205 non-null int64 57 new_batter 13321 non-null object 58 batter_runs 278205 non-null int64 59 batter_balls 278205 non-null int64 60 bowler_wicket 278205 non-null int64 61 batting_partners 278205 non-null object 62 next_batter 13321 non-null object 63 striker_out 278205 non-null bool dtypes: bool(3), float64(2), int64(24), object(35) memory usage: 130.3+ MB
Out[15]:
| Unnamed: 0 | match_id | innings | over | ball | ball_no | bat_pos | runs_batter | balls_faced | valid_ball | ... | month | year | balls_per_over | overs | team_runs | team_balls | team_wicket | batter_runs | batter_balls | bowler_wicket | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 278205.000000 | 2.782050e+05 | 278205.000000 | 278205.000000 | 278205.000000 | 278205.000000 | 278205.000000 | 278205.000000 | 278205.000000 | 278205.000000 | ... | 278205.000000 | 278205.000000 | 278205.0 | 278205.0 | 278205.000000 | 278205.000000 | 278205.000000 | 278205.000000 | 278205.000000 | 278205.000000 |
| mean | 139102.000000 | 9.422687e+05 | 1.482914 | 9.193839 | 3.488855 | 9.542725 | 3.612555 | 1.277378 | 0.967362 | 0.963182 | ... | 4.787933 | 2016.710178 | 6.0 | 20.0 | 77.110498 | 58.614637 | 2.456972 | 18.327424 | 14.011211 | 0.045470 |
| std | 80311.010157 | 3.817198e+05 | 0.502571 | 5.681511 | 1.708263 | 5.682938 | 2.168978 | 1.651107 | 0.177687 | 0.188315 | ... | 1.586724 | 5.248572 | 0.0 | 0.0 | 49.957873 | 34.117619 | 2.100374 | 18.578093 | 11.833930 | 0.208333 |
| min | 0.000000 | 3.359820e+05 | 1.000000 | 0.000000 | 1.000000 | 0.100000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.000000 | 2008.000000 | 6.0 | 20.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 69551.000000 | 5.483530e+05 | 1.000000 | 4.000000 | 2.000000 | 4.500000 | 2.000000 | 0.000000 | 1.000000 | 1.000000 | ... | 4.000000 | 2012.000000 | 6.0 | 20.0 | 36.000000 | 29.000000 | 1.000000 | 4.000000 | 5.000000 | 0.000000 |
| 50% | 139102.000000 | 1.082601e+06 | 1.000000 | 9.000000 | 3.000000 | 9.400000 | 3.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 4.000000 | 2017.000000 | 6.0 | 20.0 | 73.000000 | 58.000000 | 2.000000 | 12.000000 | 11.000000 | 0.000000 |
| 75% | 208653.000000 | 1.304049e+06 | 2.000000 | 14.000000 | 5.000000 | 14.400000 | 5.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 5.000000 | 2022.000000 | 6.0 | 20.0 | 113.000000 | 88.000000 | 4.000000 | 27.000000 | 20.000000 | 0.000000 |
| max | 278204.000000 | 1.485779e+06 | 6.000000 | 19.000000 | 7.000000 | 19.600000 | 11.000000 | 6.000000 | 1.000000 | 1.000000 | ... | 11.000000 | 2025.000000 | 6.0 | 20.0 | 287.000000 | 121.000000 | 10.000000 | 175.000000 | 73.000000 | 1.000000 |
8 rows × 26 columns
In [16]:
ipl_data.isnull().sum()
Out[16]:
Unnamed: 0 0
match_id 0
date 0
match_type 0
event_name 0
...
batter_balls 0
bowler_wicket 0
batting_partners 0
next_batter 264884
striker_out 0
Length: 64, dtype: int64
In [17]:
ipl_data.drop_duplicates(inplace=True)
In [18]:
ipl_data.reset_index(drop=True, inplace=True)
In [19]:
print("\033[1mData Overview\033[0m")
Data Overview
In [20]:
ipl_data.head(50)
Out[20]:
| Unnamed: 0 | match_id | date | match_type | event_name | innings | batting_team | bowling_team | over | ball | ... | team_runs | team_balls | team_wicket | new_batter | batter_runs | batter_balls | bowler_wicket | batting_partners | next_batter | striker_out | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 131970 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 1 | ... | 1 | 1 | 0 | NaN | 0 | 1 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 1 | 131971 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 2 | ... | 1 | 2 | 0 | NaN | 0 | 1 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 2 | 131972 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 3 | ... | 2 | 2 | 0 | NaN | 0 | 1 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 3 | 131973 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 3 | ... | 2 | 3 | 0 | NaN | 0 | 2 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 4 | 131974 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 4 | ... | 2 | 4 | 0 | NaN | 0 | 3 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 5 | 131975 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 5 | ... | 2 | 5 | 0 | NaN | 0 | 4 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 6 | 131976 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 0 | 6 | ... | 3 | 6 | 0 | NaN | 0 | 5 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 7 | 131977 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 1 | 1 | ... | 3 | 7 | 0 | NaN | 0 | 6 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 8 | 131978 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 1 | 2 | ... | 7 | 8 | 0 | NaN | 4 | 7 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 9 | 131979 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 1 | 3 | ... | 11 | 9 | 0 | NaN | 8 | 8 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 10 | 131980 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 1 | 4 | ... | 17 | 10 | 0 | NaN | 14 | 9 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 11 | 131981 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 1 | 5 | ... | 21 | 11 | 0 | NaN | 18 | 10 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 12 | 131982 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 1 | 6 | ... | 21 | 12 | 0 | NaN | 18 | 11 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 13 | 131983 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 2 | 1 | ... | 21 | 13 | 0 | NaN | 0 | 2 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 14 | 131984 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 2 | 2 | ... | 21 | 14 | 0 | NaN | 0 | 3 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 15 | 131985 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 2 | 3 | ... | 22 | 15 | 0 | NaN | 0 | 4 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 16 | 131986 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 2 | 4 | ... | 26 | 16 | 0 | NaN | 22 | 12 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 17 | 131987 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 2 | 5 | ... | 27 | 17 | 0 | NaN | 23 | 13 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 18 | 131988 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 2 | 6 | ... | 27 | 18 | 0 | NaN | 0 | 5 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 19 | 131989 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 3 | 1 | ... | 32 | 18 | 0 | NaN | 23 | 13 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 20 | 131990 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 3 | 1 | ... | 38 | 19 | 0 | NaN | 29 | 14 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 21 | 131991 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 3 | 2 | ... | 39 | 20 | 0 | NaN | 29 | 15 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 22 | 131992 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 3 | 3 | ... | 43 | 21 | 0 | NaN | 4 | 6 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 23 | 131993 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 3 | 4 | ... | 43 | 22 | 0 | NaN | 4 | 7 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 24 | 131994 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 3 | 5 | ... | 44 | 23 | 0 | NaN | 5 | 8 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 25 | 131995 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 3 | 6 | ... | 50 | 24 | 0 | NaN | 35 | 16 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 26 | 131996 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 4 | 1 | ... | 54 | 25 | 0 | NaN | 9 | 9 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 27 | 131997 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 4 | 2 | ... | 55 | 26 | 0 | NaN | 10 | 10 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 28 | 131998 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 4 | 3 | ... | 59 | 27 | 0 | NaN | 39 | 17 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 29 | 131999 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 4 | 4 | ... | 59 | 28 | 0 | NaN | 39 | 18 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 30 | 132000 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 4 | 5 | ... | 60 | 29 | 0 | NaN | 40 | 19 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 31 | 132001 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 4 | 6 | ... | 60 | 30 | 0 | NaN | 10 | 11 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 32 | 132002 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 5 | 1 | ... | 61 | 31 | 0 | NaN | 41 | 20 | 0 | ('BB McCullum', 'SC Ganguly') | NaN | False |
| 33 | 132003 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 5 | 2 | ... | 61 | 32 | 1 | NaN | 10 | 12 | 1 | ('BB McCullum', 'SC Ganguly') | RT Ponting | True |
| 34 | 132004 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 5 | 3 | ... | 61 | 33 | 1 | RT Ponting | 0 | 1 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 35 | 132005 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 5 | 4 | ... | 61 | 34 | 1 | NaN | 0 | 2 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 36 | 132006 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 5 | 5 | ... | 61 | 35 | 1 | NaN | 0 | 3 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 37 | 132007 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 5 | 6 | ... | 61 | 36 | 1 | NaN | 0 | 4 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 38 | 132008 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 6 | 1 | ... | 62 | 37 | 1 | NaN | 42 | 21 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 39 | 132009 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 6 | 2 | ... | 63 | 38 | 1 | NaN | 1 | 5 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 40 | 132010 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 6 | 3 | ... | 64 | 39 | 1 | NaN | 43 | 22 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 41 | 132011 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 6 | 4 | ... | 66 | 40 | 1 | NaN | 3 | 6 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 42 | 132012 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 6 | 5 | ... | 67 | 41 | 1 | NaN | 4 | 7 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 43 | 132013 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 6 | 6 | ... | 68 | 42 | 1 | NaN | 44 | 23 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 44 | 132014 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 7 | 1 | ... | 68 | 43 | 1 | NaN | 44 | 24 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 45 | 132015 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 7 | 2 | ... | 69 | 44 | 1 | NaN | 45 | 25 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 46 | 132016 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 7 | 3 | ... | 70 | 45 | 1 | NaN | 5 | 8 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 47 | 132017 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 7 | 4 | ... | 71 | 46 | 1 | NaN | 46 | 26 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 48 | 132018 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 7 | 5 | ... | 72 | 47 | 1 | NaN | 6 | 9 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
| 49 | 132019 | 335982 | 2008-04-18 | T20 | Indian Premier League | 1 | Kolkata Knight Riders | Royal Challengers Bangalore | 7 | 6 | ... | 73 | 48 | 1 | NaN | 47 | 27 | 0 | ('BB McCullum', 'RT Ponting') | NaN | False |
50 rows × 64 columns
In [21]:
ipl_data.shape
Out[21]:
(278205, 64)
In [22]:
ipl_data.columns
Out[22]:
Index(['Unnamed: 0', 'match_id', 'date', 'match_type', 'event_name', 'innings',
'batting_team', 'bowling_team', 'over', 'ball', 'ball_no', 'batter',
'bat_pos', 'runs_batter', 'balls_faced', 'bowler', 'valid_ball',
'runs_extras', 'runs_total', 'runs_bowler', 'runs_not_boundary',
'extra_type', 'non_striker', 'non_striker_pos', 'wicket_kind',
'player_out', 'fielders', 'runs_target', 'review_batter',
'team_reviewed', 'review_decision', 'umpire', 'umpires_call',
'player_of_match', 'match_won_by', 'win_outcome', 'toss_winner',
'toss_decision', 'venue', 'city', 'day', 'month', 'year', 'season',
'gender', 'team_type', 'superover_winner', 'result_type', 'method',
'balls_per_over', 'overs', 'event_match_no', 'stage', 'match_number',
'team_runs', 'team_balls', 'team_wicket', 'new_batter', 'batter_runs',
'batter_balls', 'bowler_wicket', 'batting_partners', 'next_batter',
'striker_out'],
dtype='object')
In [23]:
ipl_data['date'] = pd.to_datetime(ipl_data['date'], errors='coerce')
ipl_data['season'] = ipl_data['date'].dt.year
ipl_data = ipl_data[(ipl_data['season'] >= 2008) & (ipl_data['season'] <= 2025)]
In [24]:
ipl_data['season'].nunique()
Out[24]:
18
In [25]:
ipl_data['season'].unique()
Out[25]:
array([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
2019, 2020, 2021, 2022, 2023, 2024, 2025])
In [26]:
ipl_data['season'].value_counts().to_frame().T
Out[26]:
| season | 2013 | 2022 | 2023 | 2012 | 2025 | 2024 | 2011 | 2020 | 2010 | 2021 | 2019 | 2014 | 2018 | 2016 | 2017 | 2015 | 2009 | 2008 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 18177 | 17912 | 17863 | 17767 | 17285 | 17103 | 17013 | 14559 | 14498 | 14425 | 14312 | 14300 | 14286 | 14096 | 13862 | 13652 | 13606 | 13489 |
In [27]:
print(ipl_data['season'].value_counts().sort_index())
season 2008 13489 2009 13606 2010 14498 2011 17013 2012 17767 2013 18177 2014 14300 2015 13652 2016 14096 2017 13862 2018 14286 2019 14312 2020 14559 2021 14425 2022 17912 2023 17863 2024 17103 2025 17285 Name: count, dtype: int64
In [28]:
balls_per_season = ipl_data['season'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(balls_per_season.index, balls_per_season.values, color='skyblue')
plt.title('Total Balls Played Per Season', fontsize=16, fontweight='bold')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Total Balls Played', fontsize=12)
plt.xticks(rotation=45)
for i, value in enumerate(balls_per_season.values):
plt.text(balls_per_season.index[i], value, str(value), ha='center', va='bottom', fontsize=9)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
In [29]:
unique_matches = ipl_data[['match_id', 'season']].drop_duplicates()
unique_matches['season'] = unique_matches['season'].astype(str).str.strip()
unique_matches = unique_matches[unique_matches['season'].str.isnumeric()]
unique_matches['season'] = unique_matches['season'].astype(int)
matches_per_season = unique_matches['season'].value_counts().sort_index()
print(matches_per_season)
season 2008 58 2009 57 2010 60 2011 73 2012 74 2013 76 2014 60 2015 59 2016 60 2017 59 2018 60 2019 60 2020 60 2021 60 2022 74 2023 74 2024 71 2025 74 Name: count, dtype: int64
In [30]:
matches_per_season = ipl_data[['match_id', 'season']].drop_duplicates()
matches_count = matches_per_season['season'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(matches_count.index, matches_count.values, color='lightgreen')
plt.title('Total Matches Played Per Season', fontsize=16, fontweight='bold')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Number of Matches', fontsize=12)
plt.xticks(rotation=45)
for i, value in enumerate(matches_count.values):
plt.text(matches_count.index[i], value, str(value), ha='center', va='bottom', fontsize=9)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
In [31]:
matches_per_city = ipl_data[['match_id', 'city']].drop_duplicates()
city_match_counts = matches_per_city['city'].value_counts().sort_values(ascending=False)
print(city_match_counts)
city Mumbai 180 Kolkata 100 Delhi 97 Chennai 91 Hyderabad 83 Bangalore 65 Jaipur 64 Chandigarh 61 Unknown 51 Pune 51 Ahmedabad 45 Abu Dhabi 37 Bengaluru 34 Lucknow 22 Visakhapatnam 17 Dharamsala 15 Durban 15 Dubai 13 Centurion 12 Sharjah 10 Rajkot 10 Indore 9 Mohali 9 Navi Mumbai 9 Johannesburg 8 Cuttack 7 Port Elizabeth 7 Cape Town 7 Ranchi 7 Raipur 6 Guwahati 5 Kochi 5 Kanpur 4 Nagpur 3 East London 3 Kimberley 3 Bloemfontein 2 New Chandigarh 2 Name: count, dtype: int64
In [32]:
matches_per_city = ipl_data[['match_id', 'city']].drop_duplicates()
city_match_counts = matches_per_city['city'].value_counts()
city_match_counts_df = city_match_counts.reset_index()
city_match_counts_df.columns = ['City', 'Matches_Hosted']
plt.figure(figsize=(14, 10))
sns.barplot(x='Matches_Hosted', y='City', data=city_match_counts_df, palette='viridis')
plt.title('Total Matches Hosted by Each City (All Venues)', fontsize=16, fontweight='bold')
plt.xlabel('Number of Matches', fontsize=12)
plt.ylabel('City', fontsize=12)
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [33]:
second_innings = ipl_data[ipl_data['innings'] == 2]
target_data = second_innings.groupby('match_id')['runs_total'].sum().reset_index()
target_data.columns = ['Match_ID', 'Target_Runs']
target_data.head()
Out[33]:
| Match_ID | Target_Runs | |
|---|---|---|
| 0 | 335982 | 82 |
| 1 | 335983 | 207 |
| 2 | 335984 | 132 |
| 3 | 335985 | 166 |
| 4 | 335986 | 112 |
In [34]:
plt.figure(figsize=(10, 6))
sns.histplot(target_data['Target_Runs'], bins=30, kde=True, color='orange')
plt.title('Distribution of Target Runs in IPL (2nd Innings)', fontsize=16, fontweight='bold')
plt.xlabel('Target Runs', fontsize=12)
plt.ylabel('Number of Matches', fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
In [35]:
second_innings = ipl_data[ipl_data['innings'] == 2]
target_by_match = second_innings.groupby(['match_id', 'season'])['runs_total'].sum().reset_index()
avg_target_by_season = target_by_match.groupby('season')['runs_total'].mean().reset_index()
avg_target_by_season.columns = ['Season', 'Avg_Target_Runs']
avg_target_by_season.head(18)
Out[35]:
| Season | Avg_Target_Runs | |
|---|---|---|
| 0 | 2008 | 148.293103 |
| 1 | 2009 | 136.052632 |
| 2 | 2010 | 149.616667 |
| 3 | 2011 | 139.319444 |
| 4 | 2012 | 145.878378 |
| 5 | 2013 | 140.697368 |
| 6 | 2014 | 152.083333 |
| 7 | 2015 | 146.948276 |
| 8 | 2016 | 151.766667 |
| 9 | 2017 | 152.338983 |
| 10 | 2018 | 159.216667 |
| 11 | 2019 | 156.600000 |
| 12 | 2020 | 153.033333 |
| 13 | 2021 | 151.050000 |
| 14 | 2022 | 158.540541 |
| 15 | 2023 | 166.657534 |
| 16 | 2024 | 176.197183 |
| 17 | 2025 | 174.013889 |
In [36]:
plt.figure(figsize=(12, 6))
sns.lineplot(data=avg_target_by_season, x='Season', y='Avg_Target_Runs', marker='o', color='crimson')
plt.title('Trend of Average Target Runs by Season', fontsize=16, fontweight='bold')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Average Target Runs', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
# Annotate values on the line
for i, row in avg_target_by_season.iterrows():
plt.text(row['Season'], row['Avg_Target_Runs'] + 1, f"{row['Avg_Target_Runs']:.0f}",
ha='center', va='bottom', fontsize=9, color='black')
plt.tight_layout()
plt.show()
In [37]:
matches_df = ipl_data.drop_duplicates(subset='match_id')
matches_df = matches_df.dropna(subset=['win_outcome'])
matches_df['win_outcome'] = matches_df['win_outcome'].astype(str).str.strip().str.lower()
matches_df['win_type'] = matches_df['win_outcome'].apply(
lambda x: 'runs' if 'runs' in x else ('wickets' if 'wickets' in x else None)
)
matches_df['win_margin'] = matches_df['win_outcome'].str.extract(r'(\d+)')
matches_df['win_margin'] = pd.to_numeric(matches_df['win_margin'], errors='coerce')
won_by_runs = matches_df[matches_df['win_type'] == 'runs'][['season', 'match_id', 'win_margin']].copy()
won_by_wickets = matches_df[matches_df['win_type'] == 'wickets'][['season', 'match_id', 'win_margin']].copy()
won_by_runs.rename(columns={'win_margin': 'Win_Margin_Runs'}, inplace=True)
won_by_wickets.rename(columns={'win_margin': 'Win_Margin_Wickets'}, inplace=True)
print("🏏 Won by Runs Sample:\n", won_by_runs.head())
print("🏏 Won by Wickets Sample:\n", won_by_wickets.head())
🏏 Won by Runs Sample:
season match_id Win_Margin_Runs
0 2008 335982 140
225 2008 335983 33
1624 2008 335989 6
2127 2008 335991 66
3289 2008 335996 13
🏏 Won by Wickets Sample:
season match_id Win_Margin_Wickets
473 2008 335984 9
692 2008 335985 5
938 2008 335986 5
1178 2008 335987 6
1419 2008 335988 9
In [38]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
sns.histplot(won_by_runs['Win_Margin_Runs'], bins=30, kde=True, color='orangered')
plt.title('Distribution of Result Margin - Won by Runs', fontsize=16)
plt.xlabel('Win Margin (Runs)', fontsize=12)
plt.ylabel('Number of Matches', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()
In [39]:
plt.figure(figsize=(10, 6))
sns.countplot(x='Win_Margin_Wickets', data=won_by_wickets, palette='Blues')
plt.title('Distribution of Result Margin - Won by Wickets', fontsize=16)
plt.xlabel('Win Margin (Wickets)', fontsize=12)
plt.ylabel('Number of Matches', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()
In [40]:
matches_df = ipl_data.drop_duplicates(subset='match_id')
matches_df['win_outcome'] = matches_df['win_outcome'].astype(str).str.strip().str.lower()
def extract_margin_type(value):
try:
if pd.notna(value) and ('runs' in value or 'wickets' in value):
parts = value.split()
return pd.Series([int(parts[0]), parts[1]])
except:
return pd.Series([None, None])
return pd.Series([None, None])
matches_df[['margin', 'margin_type']] = matches_df['win_outcome'].apply(extract_margin_type)
matches_df = matches_df.dropna(subset=['margin'])
matches_df['margin'] = matches_df['margin'].astype(int)
In [41]:
avg_margin_by_season = matches_df.groupby(['season', 'margin_type'])['margin'].mean().reset_index()
avg_margin_by_season.columns = ['Season', 'Margin_Type', 'Average_Margin']
avg_margin_by_season.head(36)
Out[41]:
| Season | Margin_Type | Average_Margin | |
|---|---|---|---|
| 0 | 2008 | runs | 29.375000 |
| 1 | 2008 | wickets | 6.500000 |
| 2 | 2009 | runs | 28.296296 |
| 3 | 2009 | wickets | 6.206897 |
| 4 | 2010 | runs | 31.483871 |
| 5 | 2010 | wickets | 6.785714 |
| 6 | 2011 | runs | 33.272727 |
| 7 | 2011 | wickets | 6.794872 |
| 8 | 2012 | runs | 28.235294 |
| 9 | 2012 | wickets | 6.025000 |
| 10 | 2013 | runs | 33.540541 |
| 11 | 2013 | wickets | 6.135135 |
| 12 | 2014 | runs | 29.272727 |
| 13 | 2014 | wickets | 6.081081 |
| 14 | 2015 | runs | 26.562500 |
| 15 | 2015 | wickets | 6.166667 |
| 16 | 2016 | runs | 32.190476 |
| 17 | 2016 | wickets | 6.256410 |
| 18 | 2017 | runs | 30.307692 |
| 19 | 2017 | wickets | 6.375000 |
| 20 | 2018 | runs | 24.107143 |
| 21 | 2018 | wickets | 5.812500 |
| 22 | 2019 | runs | 30.227273 |
| 23 | 2019 | wickets | 5.771429 |
| 24 | 2020 | runs | 39.370370 |
| 25 | 2020 | wickets | 6.965517 |
| 26 | 2021 | runs | 26.454545 |
| 27 | 2021 | wickets | 5.918919 |
| 28 | 2022 | runs | 27.945946 |
| 29 | 2022 | wickets | 6.000000 |
| 30 | 2023 | runs | 30.400000 |
| 31 | 2023 | wickets | 5.727273 |
| 32 | 2024 | runs | 30.142857 |
| 33 | 2024 | wickets | 5.944444 |
| 34 | 2025 | runs | 33.181818 |
| 35 | 2025 | wickets | 6.324324 |
In [42]:
plt.figure(figsize=(12, 6))
sns.lineplot(data=avg_margin_by_season, x='Season', y='Average_Margin', hue='Margin_Type', marker='o')
plt.title('Average Match Win Margin by Season (Runs vs Wickets)', fontsize=14)
plt.xlabel('Season')
plt.ylabel('Average Win Margin')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend(title='Margin Type')
plt.tight_layout()
plt.show()
In [43]:
matches_df = ipl_data.drop_duplicates(subset='match_id')
matches_df = matches_df.dropna(subset=['toss_winner', 'toss_decision', 'match_won_by'])
matches_df['toss_decision'] = matches_df['toss_decision'].astype(str).str.strip().str.lower()
matches_df['won_after_toss'] = matches_df['toss_winner'] == matches_df['match_won_by']
toss_win_stats = matches_df.groupby('toss_decision')['won_after_toss'].value_counts(normalize=True).unstack().fillna(0) * 100
toss_win_stats.columns = ['Lost Match', 'Won Match']
toss_win_stats = toss_win_stats.reset_index()
toss_win_stats
Out[43]:
| toss_decision | Lost Match | Won Match | |
|---|---|---|---|
| 0 | bat | 54.814815 | 45.185185 |
| 1 | field | 46.596859 | 53.403141 |
In [44]:
toss_win_stats_melted = toss_win_stats.melt(id_vars='toss_decision', value_vars=['Won Match', 'Lost Match'],
var_name='Match Outcome', value_name='Percentage')
plt.figure(figsize=(10, 6))
sns.barplot(data=toss_win_stats_melted, x='toss_decision', y='Percentage', hue='Match Outcome')
plt.title('Percentage of Matches Won Based on Toss Decision', fontsize=14)
plt.xlabel('Toss Decision')
plt.ylabel('Percentage (%)')
plt.grid(axis='y')
plt.tight_layout()
plt.show()
In [45]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
super_over_df = df[df['superover_winner'].notna()]
super_over_matches = super_over_df.drop_duplicates(subset=['match_id'])
def clean_season(season):
if isinstance(season, str) and '/' in season:
return int(season.split('/')[0]) # Take first part before '/'
return int(season)
super_over_matches['season'] = super_over_matches['season'].apply(clean_season)
super_over_by_season = super_over_matches.groupby('season')['match_id'].nunique().reset_index()
super_over_by_season.columns = ['Season', 'Super_Over_Matches']
super_over_by_season = super_over_by_season.sort_values(by='Season').reset_index(drop=True)
print(super_over_by_season)
Season Super_Over_Matches 0 2009 2 1 2013 2 2 2014 1 3 2015 1 4 2017 1 5 2019 2 6 2020 4 7 2021 1 8 2025 1
In [46]:
plt.figure(figsize=(10, 6))
sns.barplot(data=super_over_by_season, x='Season', y='Super_Over_Matches', palette='mako')
plt.title('Super Over Matches by IPL Season', fontsize=16)
plt.xlabel('Season')
plt.ylabel('Super Over Matches')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [47]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
umpire_df = df[['match_id', 'umpire', 'umpires_call']].drop_duplicates()
umpire_list = pd.concat([
umpire_df[['match_id', 'umpire']].rename(columns={'umpire': 'Umpire'}),
umpire_df[['match_id', 'umpires_call']].rename(columns={'umpires_call': 'Umpire'})
])
umpire_list = umpire_list.dropna()
umpire_list['Umpire'] = umpire_list['Umpire'].astype(str).str.strip()
umpire_list = umpire_list[~umpire_list['Umpire'].isin(['True', 'False'])]
umpire_counts = umpire_list.groupby('Umpire')['match_id'].nunique().reset_index()
umpire_counts.columns = ['Umpire', 'Match_Count']
top_umpires = umpire_counts.sort_values(by='Match_Count', ascending=False).head(10).reset_index(drop=True)
print(top_umpires)
Umpire Match_Count 0 Nitin Menon 44 1 AK Chaudhary 43 2 KN Ananthapadmanabhan 38 3 CB Gaffaney 37 4 VK Sharma 35 5 UV Gandhe 26 6 YC Barde 23 7 J Madanagopal 23 8 R Pandit 20 9 MV Saidharshan Kumar 18
In [48]:
plt.figure(figsize=(10, 6))
sns.barplot(data=top_umpires, y='Umpire', x='Match_Count', palette='viridis')
plt.title('Top 10 IPL Umpires by Match Count (All Seasons)', fontsize=16)
plt.xlabel('Match Count')
plt.ylabel('Umpire')
plt.tight_layout()
plt.show()
In [49]:
print("Total matches in dataset:", df['match_id'].nunique())
with_umpire = df[df['umpire'].notna() | df['umpires_call'].notna()]
print("Matches with umpire info:", with_umpire['match_id'].nunique())
Total matches in dataset: 1169 Matches with umpire info: 1169
In [50]:
print(df['umpire'].dropna().unique())
print(df['umpires_call'].dropna().unique())
['CB Gaffaney' 'RJ Tucker' 'C Shamshuddin' 'A Deshmukh' 'VA Kulkarni' 'AK Chaudhary' 'CK Nandan' 'NJ Llong' 'S Ravi' 'Nitin Menon' 'KN Ananthapadmanabhan' 'A Nand Kishore' 'VK Sharma' 'YC Barde' 'M Erasmus' 'HDPK Dharmasena' 'BNJ Oxenford' 'AY Dandekar' 'UV Gandhe' 'IJ Gould' 'PR Reiffel' 'RK Illingworth' 'K Srinivasan' 'PG Pathak' 'J Madanagopal' 'Navdeep Singh' 'Tapan Sharma' 'HAS Khalid' 'MA Gough' 'N Pandit' 'R Pandit' 'Chirra Ravikanthreddy' 'NA Patwardhan' 'GR Sadashiv Iyer' 'Vinod Seshan' 'A Totre' 'MV Saidharshan Kumar' 'AG Wharf' 'Abhijit Bhattacharya' 'A Bengeri' 'AT Holdstock' 'K Swaroopanand' 'P Joshi' 'M Krishnadas' 'K Kelkar' 'KM Gandhi' 'Anish Sahasrabudhe'] [False True]
In [51]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
match_teams = df[['match_id', 'batting_team']].drop_duplicates()
matches_played = match_teams['batting_team'].value_counts().reset_index()
matches_played.columns = ['Team', 'Total_Matches']
matches = df.drop_duplicates(subset=['match_id'])
matches_won = matches['match_won_by'].value_counts().reset_index()
matches_won.columns = ['Team', 'Wins']
team_stats = pd.merge(matches_played, matches_won, on='Team', how='left')
team_stats['Wins'] = team_stats['Wins'].fillna(0).astype(int)
team_stats = team_stats.sort_values(by='Total_Matches', ascending=False).reset_index(drop=True)
print(team_stats)
Team Total_Matches Wins 0 Mumbai Indians 277 151 1 Kolkata Knight Riders 264 135 2 Chennai Super Kings 251 142 3 Royal Challengers Bangalore 240 114 4 Rajasthan Royals 234 114 5 Sunrisers Hyderabad 195 93 6 Kings XI Punjab 190 85 7 Delhi Daredevils 161 67 8 Delhi Capitals 105 51 9 Deccan Chargers 75 29 10 Punjab Kings 74 34 11 Gujarat Titans 60 37 12 Lucknow Super Giants 58 30 13 Pune Warriors 45 12 14 Gujarat Lions 30 13 15 Royal Challengers Bengaluru 30 18 16 Rising Pune Supergiant 16 10 17 Rising Pune Supergiants 14 5 18 Kochi Tuskers Kerala 14 6
In [52]:
plt.figure(figsize=(14, 8))
bar_width = 0.4
x = range(len(team_stats))
plt.bar(x, team_stats['Total_Matches'], width=bar_width, label='Total Matches', color='steelblue')
plt.bar([p + bar_width for p in x], team_stats['Wins'], width=bar_width, label='Wins', color='seagreen')
plt.xticks([p + bar_width / 2 for p in x], team_stats['Team'], rotation=60, ha='right')
plt.ylabel("Count")
plt.title("IPL Match Statistics: Total Matches Played vs Wins (All Teams)")
plt.legend()
plt.tight_layout()
plt.show()
In [53]:
team_stats['Win%'] = (team_stats['Wins'] / team_stats['Total_Matches'] * 100).round(2)
team_stats = team_stats.sort_values(by='Win%', ascending=False)
print(team_stats[['Team', 'Total_Matches', 'Wins', 'Win%']])
Team Total_Matches Wins Win% 16 Rising Pune Supergiant 16 10 62.50 11 Gujarat Titans 60 37 61.67 15 Royal Challengers Bengaluru 30 18 60.00 2 Chennai Super Kings 251 142 56.57 0 Mumbai Indians 277 151 54.51 12 Lucknow Super Giants 58 30 51.72 1 Kolkata Knight Riders 264 135 51.14 4 Rajasthan Royals 234 114 48.72 8 Delhi Capitals 105 51 48.57 5 Sunrisers Hyderabad 195 93 47.69 3 Royal Challengers Bangalore 240 114 47.50 10 Punjab Kings 74 34 45.95 6 Kings XI Punjab 190 85 44.74 14 Gujarat Lions 30 13 43.33 18 Kochi Tuskers Kerala 14 6 42.86 7 Delhi Daredevils 161 67 41.61 9 Deccan Chargers 75 29 38.67 17 Rising Pune Supergiants 14 5 35.71 13 Pune Warriors 45 12 26.67
In [54]:
team_stats_sorted = team_stats.sort_values(by='Win%', ascending=True)
plt.figure(figsize=(12, 8))
sns.barplot(data=team_stats_sorted, x='Win%', y='Team', palette='coolwarm')
plt.title('IPL Teams by Win Percentage (All Seasons)', fontsize=16)
plt.xlabel('Win Percentage (%)')
plt.ylabel('Team')
plt.xlim(0, 100) # since it's a percentage
plt.tight_layout()
plt.show()
In [55]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
match_df = df.drop_duplicates(subset='match_id')
toss_wins = match_df['toss_winner'].value_counts().reset_index()
toss_wins.columns = ['Team', 'Toss_Wins']
toss_match_win = match_df[match_df['toss_winner'] == match_df['match_won_by']]
toss_match_wins = toss_match_win['toss_winner'].value_counts().reset_index()
toss_match_wins.columns = ['Team', 'Toss_Match_Wins']
toss_analysis = pd.merge(toss_wins, toss_match_wins, on='Team', how='left')
toss_analysis['Toss_Match_Wins'] = toss_analysis['Toss_Match_Wins'].fillna(0).astype(int)
toss_analysis['Toss_Success_%'] = (toss_analysis['Toss_Match_Wins'] / toss_analysis['Toss_Wins'] * 100).round(2)
toss_analysis = toss_analysis.sort_values(by='Toss_Success_%', ascending=False).reset_index(drop=True)
print(toss_analysis)
Team Toss_Wins Toss_Match_Wins Toss_Success_% 0 Rising Pune Supergiant 6 5 83.33 1 Royal Challengers Bengaluru 15 10 66.67 2 Gujarat Lions 15 10 66.67 3 Gujarat Titans 29 19 65.52 4 Chennai Super Kings 128 78 60.94 5 Kolkata Knight Riders 128 71 55.47 6 Mumbai Indians 151 82 54.30 7 Lucknow Super Giants 24 13 54.17 8 Delhi Capitals 58 29 50.00 9 Kochi Tuskers Kerala 8 4 50.00 10 Royal Challengers Bangalore 113 56 49.56 11 Rajasthan Royals 127 61 48.03 12 Punjab Kings 37 17 45.95 13 Deccan Chargers 43 19 44.19 14 Delhi Daredevils 80 35 43.75 15 Sunrisers Hyderabad 95 41 43.16 16 Rising Pune Supergiants 7 3 42.86 17 Kings XI Punjab 85 35 41.18 18 Pune Warriors 20 3 15.00
In [56]:
toss_plot = toss_analysis.sort_values(by='Toss_Success_%', ascending=True)
plt.figure(figsize=(12, 8))
sns.barplot(data=toss_plot, x='Toss_Success_%', y='Team', palette='plasma')
plt.title('Toss to Match Win Success % by IPL Team', fontsize=16)
plt.xlabel('Toss Success %')
plt.ylabel('Team')
plt.xlim(0, 100)
plt.tight_layout()
plt.show()
In [57]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
match_df = df.drop_duplicates(subset='match_id')
matches_played = pd.concat([
match_df[['match_id', 'toss_winner']].rename(columns={'toss_winner': 'Team'}),
match_df[['match_id', 'match_won_by']].rename(columns={'match_won_by': 'Team'})
])
matches_played = matches_played.dropna()
total_matches = matches_played['Team'].value_counts().reset_index()
total_matches.columns = ['Team', 'Total_Matches']
toss_wins = match_df['toss_winner'].value_counts().reset_index()
toss_wins.columns = ['Team', 'Toss_Win']
toss_and_match_win = match_df[match_df['toss_winner'] == match_df['match_won_by']]
toss_match_win = toss_and_match_win['toss_winner'].value_counts().reset_index()
toss_match_win.columns = ['Team', 'Toss+Match_Win']
bat_first_win = match_df[
(match_df['toss_decision'] == 'bat') &
(match_df['toss_winner'] == match_df['match_won_by'])
]
bat_first_win_count = bat_first_win['toss_winner'].value_counts().reset_index()
bat_first_win_count.columns = ['Team', 'Bat_First_Win_After_Toss']
chase_win = match_df[
(match_df['toss_decision'] == 'field') &
(match_df['toss_winner'] == match_df['match_won_by'])
]
chase_win_count = chase_win['toss_winner'].value_counts().reset_index()
chase_win_count.columns = ['Team', 'Chasing_Win_After_Toss']
result = total_matches.merge(toss_wins, on='Team', how='left') \
.merge(toss_match_win, on='Team', how='left') \
.merge(bat_first_win_count, on='Team', how='left') \
.merge(chase_win_count, on='Team', how='left')
result = result.fillna(0).astype({'Toss_Win': 'int', 'Toss+Match_Win': 'int',
'Bat_First_Win_After_Toss': 'int',
'Chasing_Win_After_Toss': 'int'})
result = result.sort_values(by='Total_Matches', ascending=False).reset_index(drop=True)
print(result)
Team Total_Matches Toss_Win Toss+Match_Win \
0 Mumbai Indians 302 151 82
1 Chennai Super Kings 270 128 78
2 Kolkata Knight Riders 263 128 71
3 Rajasthan Royals 241 127 61
4 Royal Challengers Bangalore 227 113 56
5 Sunrisers Hyderabad 188 95 41
6 Kings XI Punjab 170 85 35
7 Delhi Daredevils 147 80 35
8 Delhi Capitals 109 58 29
9 Deccan Chargers 72 43 19
10 Punjab Kings 71 37 17
11 Gujarat Titans 66 29 19
12 Lucknow Super Giants 54 24 13
13 Royal Challengers Bengaluru 33 15 10
14 Pune Warriors 32 20 3
15 Gujarat Lions 28 15 10
16 Unknown 23 0 0
17 Rising Pune Supergiant 16 6 5
18 Kochi Tuskers Kerala 14 8 4
19 Rising Pune Supergiants 12 7 3
Bat_First_Win_After_Toss Chasing_Win_After_Toss
0 30 52
1 36 42
2 21 50
3 17 44
4 16 40
5 13 28
6 6 29
7 11 24
8 7 22
9 11 8
10 3 14
11 3 16
12 5 8
13 1 9
14 3 0
15 0 10
16 0 0
17 0 5
18 0 4
19 0 3
In [58]:
plot_data = result.head(10)
teams = plot_data['Team']
x = np.arange(len(teams))
width = 0.2
plt.figure(figsize=(14, 7))
plt.bar(x - width*1.5, plot_data['Toss_Win'], width, label='Toss Wins', color='skyblue')
plt.bar(x - width/2, plot_data['Toss+Match_Win'], width, label='Toss + Match Wins', color='limegreen')
plt.bar(x + width/2, plot_data['Bat_First_Win_After_Toss'], width, label='Bat First Wins', color='orange')
plt.bar(x + width*1.5, plot_data['Chasing_Win_After_Toss'], width, label='Chasing Wins', color='plum')
plt.xticks(x, teams, rotation=45, ha='right')
plt.ylabel('Match Count')
plt.title('IPL Team Strategy: Toss & Match Outcome Breakdown')
plt.legend()
plt.tight_layout()
plt.show()
In [59]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
team_scores = df.groupby(['match_id', 'batting_team'])['runs_total'].sum().reset_index()
highest_scores = team_scores.groupby('batting_team')['runs_total'].max().reset_index()
highest_scores.columns = ['Team', 'Highest_Score']
highest_scores = highest_scores.sort_values(by='Highest_Score', ascending=False).reset_index(drop=True)
print(highest_scores)
Team Highest_Score 0 Sunrisers Hyderabad 287 1 Kolkata Knight Riders 272 2 Royal Challengers Bangalore 263 3 Royal Challengers Bengaluru 262 4 Punjab Kings 262 5 Lucknow Super Giants 257 6 Delhi Capitals 257 7 Mumbai Indians 247 8 Chennai Super Kings 246 9 Rajasthan Royals 242 10 Gujarat Titans 233 11 Kings XI Punjab 232 12 Delhi Daredevils 231 13 Deccan Chargers 214 14 Gujarat Lions 208 15 Rising Pune Supergiants 195 16 Pune Warriors 192 17 Rising Pune Supergiant 187 18 Kochi Tuskers Kerala 184
In [60]:
plot_scores = highest_scores.sort_values(by='Highest_Score', ascending=True)
plt.figure(figsize=(12, 8))
sns.barplot(data=plot_scores, x='Highest_Score', y='Team', palette='crest')
plt.title('Highest Run Score by Each IPL Team (All Seasons)', fontsize=16)
plt.xlabel('Highest Score (Runs)')
plt.ylabel('Team')
plt.tight_layout()
plt.show()
In [61]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
innings_score = df.groupby(['match_id', 'batting_team'])['runs_total'].sum().reset_index()
innings_score.columns = ['match_id', 'Team', 'Total_Runs']
high_scores = innings_score[innings_score['Total_Runs'] >= 200]
team_200plus = high_scores['Team'].value_counts().reset_index()
team_200plus.columns = ['Team', '200+ Scores']
team_200plus = team_200plus.sort_values(by='200+ Scores', ascending=False).reset_index(drop=True)
print(team_200plus)
Team 200+ Scores 0 Chennai Super Kings 35 1 Mumbai Indians 31 2 Kolkata Knight Riders 29 3 Rajasthan Royals 26 4 Sunrisers Hyderabad 26 5 Royal Challengers Bangalore 24 6 Punjab Kings 18 7 Gujarat Titans 16 8 Kings XI Punjab 15 9 Delhi Capitals 15 10 Lucknow Super Giants 13 11 Royal Challengers Bengaluru 10 12 Delhi Daredevils 5 13 Deccan Chargers 1 14 Gujarat Lions 1
In [62]:
plot_200 = team_200plus.sort_values(by='200+ Scores', ascending=True)
plt.figure(figsize=(12, 8))
sns.barplot(data=plot_200, x='200+ Scores', y='Team', palette='rocket')
plt.title('Number of 200+ Runs Innings by IPL Team', fontsize=16)
plt.xlabel('200+ Run Innings Count')
plt.ylabel('Team')
plt.tight_layout()
plt.show()
In [63]:
f = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
df['season'] = df['season'].astype(str)
df['season'] = df['season'].replace({
'2007/08': '2008',
'2009/10': '2010',
'2020/21': '2021'
})
df['season'] = df['season'].str.extract(r'(20\d{2})') # captures '2008' to '2099'
df['season'] = df['season'].astype(int)
innings_runs = df.groupby(['match_id', 'batting_team', 'season'])['runs_total'].sum().reset_index()
innings_runs.columns = ['match_id', 'Team', 'Season', 'Total_Runs']
high_scoring = innings_runs[innings_runs['Total_Runs'] >= 200]
season_200plus = high_scoring.groupby('Season')['match_id'].nunique().reset_index()
season_200plus.columns = ['Season', '200+ Run Innings']
season_200plus = season_200plus.sort_values(by='Season').reset_index(drop=True)
print(season_200plus)
Season 200+ Run Innings 0 2008 7 1 2009 1 2 2010 5 3 2011 4 4 2012 4 5 2013 4 6 2014 6 7 2015 8 8 2016 5 9 2017 8 10 2018 11 11 2019 9 12 2021 15 13 2022 13 14 2023 25 15 2024 27 16 2025 35
In [64]:
plt.figure(figsize=(12, 6))
sns.set_style("whitegrid")
sns.lineplot(data=season_200plus, x='Season', y='200+ Run Innings', marker='o', color='crimson', linewidth=2.5)
plt.title('🔥 Trend of 200+ Run Innings in IPL (2008–2025)', fontsize=16)
plt.xlabel('Season')
plt.ylabel('Number of 200+ Scores')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [65]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
df['season'] = df['season'].astype(str)
df['season'] = df['season'].replace({
'2007/08': '2008',
'2009/10': '2010',
'2020/21': '2021'
})
df['season'] = df['season'].str.extract(r'(20\d{2})')
df['season'] = df['season'].astype(int)
innings_runs = df.groupby(['match_id', 'batting_team', 'bowling_team', 'venue', 'season'])['runs_total'].sum().reset_index()
innings_runs.columns = ['match_id', 'Team', 'Opponent', 'Venue', 'Season', 'Total_Runs']
highest_each_season = innings_runs.loc[innings_runs.groupby('Season')['Total_Runs'].idxmax()].sort_values(by='Season').reset_index(drop=True)
highest_each_season = highest_each_season.rename(columns={'Total_Runs': 'Highest_Score'})
print(highest_each_season[['Season', 'Team', 'Opponent', 'Venue', 'Highest_Score']])
Season Team Opponent \
0 2008 Chennai Super Kings Kings XI Punjab
1 2009 Rajasthan Royals Kings XI Punjab
2 2010 Chennai Super Kings Rajasthan Royals
3 2011 Kings XI Punjab Royal Challengers Bangalore
4 2012 Chennai Super Kings Delhi Daredevils
5 2013 Royal Challengers Bangalore Pune Warriors
6 2014 Kings XI Punjab Chennai Super Kings
7 2015 Royal Challengers Bangalore Mumbai Indians
8 2016 Royal Challengers Bangalore Gujarat Lions
9 2017 Kings XI Punjab Mumbai Indians
10 2018 Kolkata Knight Riders Kings XI Punjab
11 2019 Kolkata Knight Riders Mumbai Indians
12 2021 Mumbai Indians Sunrisers Hyderabad
13 2022 Rajasthan Royals Delhi Capitals
14 2023 Lucknow Super Giants Punjab Kings
15 2024 Sunrisers Hyderabad Royal Challengers Bengaluru
16 2025 Sunrisers Hyderabad Rajasthan Royals
Venue Highest_Score
0 Punjab Cricket Association Stadium, Mohali 240
1 Kingsmead 211
2 MA Chidambaram Stadium, Chepauk 246
3 Himachal Pradesh Cricket Association Stadium 232
4 MA Chidambaram Stadium, Chepauk 222
5 M Chinnaswamy Stadium 263
6 Barabati Stadium 231
7 Wankhede Stadium 235
8 M Chinnaswamy Stadium 248
9 Wankhede Stadium 230
10 Holkar Cricket Stadium 245
11 Eden Gardens 232
12 Zayed Cricket Stadium, Abu Dhabi 235
13 Wankhede Stadium, Mumbai 222
14 Punjab Cricket Association IS Bindra Stadium, ... 257
15 M Chinnaswamy Stadium, Bengaluru 287
16 Rajiv Gandhi International Stadium, Uppal, Hyd... 286
In [66]:
plt.figure(figsize=(12, 6))
sns.set_style("whitegrid")
sns.lineplot(data=highest_each_season, x='Season', y='Highest_Score', marker='o', linewidth=2.5, color='purple')
plt.title('🔥 Highest IPL Team Scores Per Season (2008–2024)', fontsize=16)
plt.xlabel('Season')
plt.ylabel('Highest Score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [67]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
df['season'] = df['season'].astype(str)
df['season'] = df['season'].replace({
'2007/08': '2008',
'2009/10': '2010',
'2020/21': '2021'
})
df['season'] = df['season'].str.extract(r'(20\d{2})')
df['season'] = df['season'].astype(int)
innings_runs = df.groupby(['match_id', 'batting_team', 'season'])['runs_total'].sum().reset_index()
innings_runs.columns = ['match_id', 'Team', 'Season', 'Total_Runs']
high_scores = innings_runs[innings_runs['Total_Runs'] >= 200]
team_season_200plus = high_scores.groupby(['Team', 'Season'])['match_id'].nunique().reset_index()
team_season_200plus.columns = ['Team', 'Season', '200+ Scores']
pivot_table = team_season_200plus.pivot(index='Team', columns='Season', values='200+ Scores').fillna(0).astype(int)
pivot_table['Total_200+'] = pivot_table.sum(axis=1)
pivot_table = pivot_table.sort_values(by='Total_200+', ascending=False)
print(pivot_table.drop(columns='Total_200+'))
Season 2008 2009 2010 2011 2012 2013 2014 2015 \ Team Chennai Super Kings 3 0 1 1 2 2 2 1 Mumbai Indians 1 0 2 0 0 1 0 2 Kolkata Knight Riders 2 0 1 0 0 0 1 0 Rajasthan Royals 2 1 2 0 0 0 1 0 Sunrisers Hyderabad 0 0 0 0 0 0 1 1 Royal Challengers Bangalore 0 0 1 1 2 1 0 3 Punjab Kings 0 0 0 0 0 0 0 0 Gujarat Titans 0 0 0 0 0 0 0 0 Delhi Capitals 0 0 0 0 0 0 0 0 Kings XI Punjab 2 0 2 2 0 0 4 1 Lucknow Super Giants 0 0 0 0 0 0 0 0 Royal Challengers Bengaluru 0 0 0 0 0 0 0 0 Delhi Daredevils 0 0 0 1 1 0 0 0 Deccan Chargers 1 0 0 0 0 0 0 0 Gujarat Lions 0 0 0 0 0 0 0 0 Season 2016 2017 2018 2019 2021 2022 2023 2024 \ Team Chennai Super Kings 0 0 4 0 3 4 5 4 Mumbai Indians 1 2 2 0 5 0 6 3 Kolkata Knight Riders 0 0 3 4 2 2 4 6 Rajasthan Royals 0 0 1 0 4 3 4 3 Sunrisers Hyderabad 1 3 1 3 2 0 3 6 Royal Challengers Bangalore 4 1 2 3 2 2 2 0 Punjab Kings 0 0 0 0 1 2 4 3 Gujarat Titans 0 0 0 0 0 0 5 3 Delhi Capitals 0 0 0 1 1 3 1 5 Kings XI Punjab 0 1 1 0 2 0 0 0 Lucknow Super Giants 0 0 0 0 0 2 3 2 Royal Challengers Bengaluru 0 0 0 0 0 0 0 6 Delhi Daredevils 0 2 1 0 0 0 0 0 Deccan Chargers 0 0 0 0 0 0 0 0 Gujarat Lions 0 1 0 0 0 0 0 0 Season 2025 Team Chennai Super Kings 3 Mumbai Indians 6 Kolkata Knight Riders 4 Rajasthan Royals 5 Sunrisers Hyderabad 5 Royal Challengers Bangalore 0 Punjab Kings 8 Gujarat Titans 8 Delhi Capitals 4 Kings XI Punjab 0 Lucknow Super Giants 6 Royal Challengers Bengaluru 4 Delhi Daredevils 0 Deccan Chargers 0 Gujarat Lions 0
In [68]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
df['season'] = df['season'].astype(str).replace({
'2007/08': '2008',
'2009/10': '2010',
'2020/21': '2021'
})
df['season'] = df['season'].str.extract(r'(20\d{2})')
df['season'] = df['season'].astype(int)
innings_runs = df.groupby(['match_id', 'batting_team', 'season'])['runs_total'].sum().reset_index()
innings_runs.columns = ['match_id', 'Team', 'Season', 'Total_Runs']
high_scores = innings_runs[innings_runs['Total_Runs'] >= 200]
team_season_200plus = high_scores.groupby(['Team', 'Season'])['match_id'].nunique().reset_index()
team_season_200plus.columns = ['Team', 'Season', '200+ Scores']
all_seasons = list(range(2008, 2026))
teams = team_season_200plus['Team'].unique()
for team in sorted(teams):
team_data = team_season_200plus[team_season_200plus['Team'] == team]
team_dict = dict(zip(team_data['Season'], team_data['200+ Scores']))
scores_by_year = [team_dict.get(year, 0) for year in all_seasons]
plt.figure(figsize=(10, 4))
sns.lineplot(x=all_seasons, y=scores_by_year, marker='o', color='green', linewidth=2.5)
plt.title(f'📊 200+ Run Innings by {team} (2008–2025)', fontsize=14)
plt.xlabel('Season')
plt.ylabel('No. of 200+ Scores')
plt.xticks(all_seasons, rotation=45)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [69]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
death_overs = df[(df['over'] >= 16) & (df['over'] <= 20) & (df['valid_ball'] == 1)]
death_runs = death_overs.groupby('batting_team')['runs_total'].sum().reset_index()
death_runs.columns = ['Team', 'Death_Over_Runs']
death_runs = death_runs.sort_values(by='Death_Over_Runs', ascending=False).reset_index(drop=True)
print(death_runs)
Team Death_Over_Runs 0 Mumbai Indians 9710 1 Chennai Super Kings 9143 2 Royal Challengers Bangalore 8085 3 Kolkata Knight Riders 8033 4 Rajasthan Royals 7390 5 Sunrisers Hyderabad 6421 6 Kings XI Punjab 6029 7 Delhi Daredevils 4857 8 Delhi Capitals 3463 9 Punjab Kings 2524 10 Deccan Chargers 2440 11 Gujarat Titans 2262 12 Lucknow Super Giants 2132 13 Pune Warriors 1303 14 Royal Challengers Bengaluru 1027 15 Gujarat Lions 881 16 Rising Pune Supergiant 533 17 Rising Pune Supergiants 427 18 Kochi Tuskers Kerala 324
In [70]:
plt.figure(figsize=(12, 6))
sns.barplot(data=death_runs, x='Death_Over_Runs', y='Team', palette='flare')
plt.title('Total Runs Scored by Teams in Death Overs (16–20)', fontsize=16)
plt.xlabel('Total Runs in Overs 16–20')
plt.ylabel('Team')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [71]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
death_df = df[(df['over'] >= 16) & (df['over'] <= 20) & (df['valid_ball'] == 1)]
death_summary = death_df.groupby('batting_team').agg(
death_over_runs=('runs_total', 'sum'),
balls_faced=('valid_ball', 'count'),
wickets_lost=('player_out', 'count')
).reset_index()
death_summary['strike_rate'] = (death_summary['death_over_runs'] / death_summary['balls_faced'] * 100).round(2)
death_summary = death_summary.sort_values(by='strike_rate', ascending=False).reset_index(drop=True)
print(death_summary)
batting_team death_over_runs balls_faced wickets_lost \
0 Royal Challengers Bengaluru 1027 556 61
1 Gujarat Titans 2262 1264 115
2 Rising Pune Supergiants 427 245 24
3 Punjab Kings 2524 1464 126
4 Chennai Super Kings 9143 5323 403
5 Lucknow Super Giants 2132 1247 104
6 Royal Challengers Bangalore 8085 4731 418
7 Mumbai Indians 9710 5685 535
8 Kolkata Knight Riders 8033 4942 463
9 Delhi Capitals 3463 2135 187
10 Rising Pune Supergiant 533 331 32
11 Delhi Daredevils 4857 3019 256
12 Sunrisers Hyderabad 6421 4045 392
13 Rajasthan Royals 7390 4682 430
14 Kings XI Punjab 6029 3908 364
15 Deccan Chargers 2440 1600 180
16 Gujarat Lions 881 581 61
17 Kochi Tuskers Kerala 324 230 29
18 Pune Warriors 1303 972 103
strike_rate
0 184.71
1 178.96
2 174.29
3 172.40
4 171.76
5 170.97
6 170.89
7 170.80
8 162.55
9 162.20
10 161.03
11 160.88
12 158.74
13 157.84
14 154.27
15 152.50
16 151.64
17 140.87
18 134.05
In [72]:
plt.figure(figsize=(12, 6))
sns.barplot(data=death_summary, x='strike_rate', y='batting_team', palette='rocket')
plt.title('Death Over Strike Rate (Overs 16–20) by Team', fontsize=16)
plt.xlabel('Strike Rate')
plt.ylabel('Team')
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
In [73]:
print("\033[1mPLAYERS ANALYSIS : BATSMAN\033[0m")
PLAYERS ANALYSIS : BATSMAN
In [74]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
pom_df = df.drop_duplicates(subset='match_id')[['match_id', 'player_of_match']]
pom_df = pom_df.dropna()
pom_counts = pom_df['player_of_match'].value_counts().reset_index()
pom_counts.columns = ['Player', 'POM_Awards']
top_10_pom = pom_counts.head(10)
print(top_10_pom)
Player POM_Awards 0 AB de Villiers 25 1 CH Gayle 22 2 RG Sharma 21 3 V Kohli 19 4 MS Dhoni 18 5 DA Warner 18 6 SP Narine 17 7 AD Russell 16 8 SR Watson 16 9 YK Pathan 16
In [75]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_10_pom, x='POM_Awards', y='Player', palette='viridis')
plt.title('Players with Most Player of the Match Awards in IPL', fontsize=16)
plt.xlabel('No. of Awards')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [76]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
top_scorers = df.groupby('batter')['runs_batter'].sum().reset_index()
top_scorers.columns = ['Player', 'Total_Runs']
top_scorers = top_scorers.sort_values(by='Total_Runs', ascending=False).head(10).reset_index(drop=True)
print(top_scorers)
Player Total_Runs 0 V Kohli 8671 1 RG Sharma 7048 2 S Dhawan 6769 3 DA Warner 6567 4 SK Raina 5536 5 MS Dhoni 5439 6 KL Rahul 5235 7 AB de Villiers 5181 8 AM Rahane 5032 9 CH Gayle 4997
In [77]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_scorers, x='Total_Runs', y='Player', palette='coolwarm')
plt.title('Top 10 Run Scorers in IPL History', fontsize=16)
plt.xlabel('Total Runs')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [78]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
sixes_df = df[df['runs_batter'] == 6]
sixes_count = sixes_df.groupby('batter')['runs_batter'].count().reset_index()
sixes_count.columns = ['Player', 'Sixes']
top_six_hitters = sixes_count.sort_values(by='Sixes', ascending=False).head(10).reset_index(drop=True)
print(top_six_hitters)
Player Sixes 0 CH Gayle 359 1 RG Sharma 303 2 V Kohli 292 3 MS Dhoni 264 4 AB de Villiers 253 5 DA Warner 236 6 KA Pollard 224 7 AD Russell 223 8 SV Samson 219 9 KL Rahul 208
In [79]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_six_hitters, x='Sixes', y='Player', palette='magma')
plt.title('Top 10 Players with Most Sixes in IPL History', fontsize=16)
plt.xlabel('Number of Sixes')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [80]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
fours_df = df[df['runs_batter'] == 4]
fours_count = fours_df.groupby('batter')['runs_batter'].count().reset_index()
fours_count.columns = ['Player', 'Fours']
top_four_hitters = fours_count.sort_values(by='Fours', ascending=False).head(10).reset_index(drop=True)
print(top_four_hitters)
Player Fours 0 V Kohli 774 1 S Dhawan 768 2 DA Warner 663 3 RG Sharma 640 4 AM Rahane 515 5 SK Raina 506 6 G Gambhir 492 7 RV Uthappa 481 8 KD Karthik 466 9 SA Yadav 454
In [81]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_four_hitters, x='Fours', y='Player', palette='cubehelix')
plt.title('Top 10 Players with Most Fours in IPL History', fontsize=16)
plt.xlabel('Number of Fours')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [82]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
threes_df = df[df['runs_batter'] == 3]
threes_count = threes_df.groupby('batter')['runs_batter'].count().reset_index()
threes_count.columns = ['Player', 'Threes']
top_three_hitters = threes_count.sort_values(by='Threes', ascending=False).head(10).reset_index(drop=True)
print(top_three_hitters)
Player Threes 0 DA Warner 24 1 S Dhawan 23 2 V Kohli 21 3 AM Rahane 19 4 F du Plessis 18 5 M Vijay 17 6 AB de Villiers 17 7 SV Samson 15 8 RV Uthappa 15 9 MS Dhoni 15
In [83]:
plt.figure(figsize=(10, 6))
sns.barplot(data=top_three_hitters, x='Threes', y='Player', palette='crest')
plt.title('Top 10 Players with Most Threes in IPL History', fontsize=16)
plt.xlabel('Number of Threes')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [84]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
matches_played = df.groupby('batter')['match_id'].nunique().reset_index()
matches_played.columns = ['Player', 'Matches']
total_runs = df.groupby('batter')['runs_batter'].sum().reset_index()
total_runs.columns = ['Player', 'Total_Runs']
outs = df[df['player_out'].notna()]
times_out = outs.groupby('player_out')['match_id'].count().reset_index()
times_out.columns = ['Player', 'Times_Out']
batting_stats = matches_played.merge(total_runs, on='Player', how='inner') \
.merge(times_out, on='Player', how='left')
batting_stats['Times_Out'] = batting_stats['Times_Out'].fillna(0)
batting_stats['Average'] = batting_stats.apply(
lambda row: row['Total_Runs'] / row['Times_Out'] if row['Times_Out'] > 0 else None,
axis=1
)
batting_stats_25plus = batting_stats[batting_stats['Matches'] >= 25]
batting_stats_25plus = batting_stats_25plus.sort_values(by='Average', ascending=False).reset_index(drop=True)
print(batting_stats_25plus[['Player', 'Matches', 'Total_Runs', 'Times_Out', 'Average']].head(20))
Player Matches Total_Runs Times_Out Average 0 B Sai Sudharsan 40 1793 36.0 49.805556 1 KL Rahul 135 5235 115.0 45.521739 2 DP Conway 28 1080 25.0 43.200000 3 T Stubbs 30 711 17.0 41.823529 4 C Green 28 707 17.0 41.588235 5 Shashank Singh 33 773 19.0 40.684211 6 RD Gaikwad 70 2502 62.0 40.354839 7 DA Warner 184 6567 164.0 40.042683 8 H Klaasen 45 1480 37.0 40.000000 9 LMP Simmons 29 1079 27.0 39.962963 10 AB de Villiers 170 5181 130.0 39.853846 11 JP Duminy 75 2029 51.0 39.784314 12 CH Gayle 141 4997 126.0 39.658730 13 JC Buttler 119 4121 104.0 39.625000 14 V Kohli 259 8671 219.0 39.593607 15 SE Marsh 69 2489 63.0 39.507937 16 Shubman Gill 114 3866 98.0 39.448980 17 MEK Hussey 58 1977 51.0 38.764706 18 MS Dhoni 241 5439 142.0 38.302817 19 Tilak Varma 51 1499 40.0 37.475000
In [85]:
top_avg = batting_stats_25plus.sort_values(by='Average', ascending=False).head(20)
plt.figure(figsize=(12, 6))
sns.barplot(data=top_avg, x='Average', y='Player', palette='plasma')
plt.title('Top 20 IPL Batting Averages (Min 25 Matches)', fontsize=16)
plt.xlabel('Batting Average')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [86]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
death_overs_df = df[(df['over'] >= 16) & (df['over'] <= 20)]
death_batting = death_overs_df.groupby('batter').agg({
'runs_batter': 'sum',
'balls_faced': 'sum'
}).reset_index()
death_batting = death_batting[death_batting['balls_faced'] > 0]
death_batting['Strike_Rate'] = (death_batting['runs_batter'] / death_batting['balls_faced']) * 100
qualified_finishers = death_batting[death_batting['balls_faced'] >= 100]
top_finishers = qualified_finishers.sort_values(by='Strike_Rate', ascending=False).head(10).reset_index(drop=True)
print(top_finishers[['batter', 'runs_batter', 'balls_faced', 'Strike_Rate']])
batter runs_batter balls_faced Strike_Rate 0 T Stubbs 370 152 243.421053 1 AB de Villiers 1421 611 232.569558 2 LS Livingstone 241 107 225.233645 3 Naman Dhir 222 103 215.533981 4 Shashank Singh 360 172 209.302326 5 H Klaasen 434 209 207.655502 6 CH Gayle 404 196 206.122449 7 AD Russell 1134 552 205.434783 8 RR Pant 696 339 205.309735 9 TH David 578 283 204.240283
In [87]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_finishers, x='Strike_Rate', y='batter', palette='rocket')
plt.title('Top 10 Batsmen by Strike Rate in Death Overs (Min 100 Balls)', fontsize=16)
plt.xlabel('Strike Rate')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [88]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
powerplay_df = df[(df['over'] >= 1) & (df['over'] <= 6)]
pp_batting = powerplay_df.groupby('batter').agg({
'runs_batter': 'sum',
'balls_faced': 'sum'
}).reset_index()
pp_batting = pp_batting[pp_batting['balls_faced'] > 0]
pp_batting['Strike_Rate'] = (pp_batting['runs_batter'] / pp_batting['balls_faced']) * 100
qualified_openers = pp_batting[pp_batting['balls_faced'] >= 100]
top_pp_hitters = qualified_openers.sort_values(by='Strike_Rate', ascending=False).head(10).reset_index(drop=True)
print(top_pp_hitters[['batter', 'runs_batter', 'balls_faced', 'Strike_Rate']])
batter runs_batter balls_faced Strike_Rate 0 J Fraser-McGurk 266 108 246.296296 1 Priyansh Arya 318 164 193.902439 2 TM Head 632 330 191.515152 3 PD Salt 641 343 186.880466 4 Abhishek Sharma 1026 563 182.238011 5 SP Narine 1031 584 176.541096 6 N Pooran 263 155 169.677419 7 YBK Jaiswal 1183 738 160.298103 8 RD Rickelton 268 168 159.523810 9 A Raghuvanshi 169 108 156.481481
In [89]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_pp_hitters, x='Strike_Rate', y='batter', palette='coolwarm')
plt.title('Top 10 Batsmen by Strike Rate in Powerplay (Overs 1–6)', fontsize=16)
plt.xlabel('Strike Rate')
plt.ylabel('Player')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [90]:
centuries = df.groupby(['match_id', 'batter']).agg({'runs_batter': 'sum', 'balls_faced': 'sum'}).reset_index()
centuries = centuries[centuries['runs_batter'] >= 100]
fastest_centuries = centuries.sort_values(by='balls_faced').head(10)
fastest_centuries = fastest_centuries.rename(columns={'runs_batter': 'Runs', 'balls_faced': 'Balls'})
print(fastest_centuries)
match_id batter Runs Balls 1786 419107 YK Pathan 100 37 5840 598064 DA Miller 101 38 17231 1473484 V Suryavanshi 101 38 17524 1473505 H Klaasen 105 39 16085 1426283 WG Jacks 100 41 15856 1426268 TM Head 102 41 16864 1473459 Priyansh Arya 103 42 15310 1359543 C Green 100 47 16535 1473439 Ishan Kishan 106 47 184 335994 AC Gilchrist 109 47
In [91]:
fifties = df.groupby(['match_id', 'batter']).agg({'runs_batter': 'sum', 'balls_faced': 'sum'}).reset_index()
fifties = fifties[(fifties['runs_batter'] >= 50) & (fifties['runs_batter'] < 100)]
fastest_fifties = fifties.sort_values(by='balls_faced').head(10)
fastest_fifties = fastest_fifties.rename(columns={'runs_batter': 'Runs', 'balls_faced': 'Balls'})
print(fastest_fifties)
match_id batter Runs Balls 17310 1473489 R Shepherd 53 14 13288 1304060 PJ Cummins 56 15 9538 1136562 KL Rahul 51 16 8076 980947 KA Pollard 51 17 9297 1082636 SP Narine 54 17 15933 1426273 J Fraser-McGurk 65 18 9559 1136563 SP Narine 50 19 15782 1426263 SA Yadav 52 19 6082 729291 DA Miller 51 19 14469 1359489 N Pooran 62 19
In [92]:
high_scores = df.groupby(['match_id', 'batter']).agg({'runs_batter': 'sum'}).reset_index()
high_scores = high_scores.sort_values(by='runs_batter', ascending=False).head(10)
high_scores = high_scores.rename(columns={'runs_batter': 'Total_Runs'})
print(high_scores)
match_id batter Total_Runs 5302 598027 CH Gayle 175 2 335982 BB McCullum 158 16934 1473464 Abhishek Sharma 141 14108 1304112 Q de Kock 140 7528 829795 AB de Villiers 133 11583 1216510 KL Rahul 132 15383 1370352 Shubman Gill 129 8359 980987 AB de Villiers 129 10149 1136602 RR Pant 128 4687 548372 CH Gayle 128
In [93]:
plt.figure(figsize=(10, 5))
sns.barplot(data=high_scores, x='batter', y='Total_Runs', palette='plasma')
plt.title('Top 10 Highest Individual Scores in an IPL Match', fontsize=14)
plt.xlabel('Batsman')
plt.ylabel('Total Runs')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [94]:
print("\033[1mPLAYERS ANALYSIS : BOWLERS\033[0m")
PLAYERS ANALYSIS : BOWLERS
In [95]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
valid_wickets = df[df['bowler_wicket'] == 1]
wicket_count = valid_wickets.groupby('bowler')['bowler_wicket'].sum().reset_index()
wicket_count.columns = ['Bowler', 'Wickets']
top_wicket_takers = wicket_count.sort_values(by='Wickets', ascending=False).head(10).reset_index(drop=True)
print(top_wicket_takers)
Bowler Wickets 0 YS Chahal 221 1 B Kumar 198 2 SP Narine 192 3 PP Chawla 192 4 R Ashwin 187 5 JJ Bumrah 186 6 DJ Bravo 183 7 A Mishra 174 8 SL Malinga 170 9 RA Jadeja 170
In [96]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(12, 6))
sns.barplot(data=top_wicket_takers, x='Bowler', y='Wickets', palette='mako')
plt.title('Top 10 Wicket-Takers in IPL History', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Total Wickets')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [97]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
valid_balls = df[df['valid_ball'] == 1]
economy_stats = valid_balls.groupby('bowler').agg({
'runs_bowler': 'sum',
'valid_ball': 'count'
}).reset_index()
economy_stats.columns = ['Bowler', 'Runs_Conceded', 'Balls_Bowled']
economy_stats = economy_stats[economy_stats['Balls_Bowled'] >= 250]
economy_stats['Overs'] = economy_stats['Balls_Bowled'] / 6
economy_stats['Economy'] = (economy_stats['Runs_Conceded'] / economy_stats['Overs']).round(2)
top_economical = economy_stats.sort_values(by='Economy').head(15).reset_index(drop=True)
print(top_economical[['Bowler', 'Overs', 'Runs_Conceded', 'Economy']])
Bowler Overs Runs_Conceded Economy 0 A Kumble 160.833333 1019 6.34 1 M Muralitharan 254.666667 1641 6.44 2 SM Pollock 46.000000 297 6.46 3 GD McGrath 54.000000 351 6.50 4 RE van der Merwe 73.833333 486 6.58 5 DW Steyn 363.666667 2393 6.58 6 SP Narine 725.166667 4835 6.67 7 R Rampaul 44.666667 298 6.67 8 J Yadav 65.000000 436 6.71 9 DL Vettori 129.500000 871 6.73 10 SL Malinga 471.166667 3176 6.74 11 J Botha 115.666667 781 6.75 12 DP Nannes 107.666667 732 6.80 13 AD Mascarenhas 51.333333 353 6.88 14 DE Bollinger 96.000000 661 6.89
In [98]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_economical, x='Bowler', y='Economy', palette='crest_r')
plt.title('Top 15 Most Economical Bowlers in IPL History', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Economy Rate')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.tight_layout()
plt.show()
In [99]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
valid_balls = df[df['valid_ball'] == 1]
expensive_stats = valid_balls.groupby('bowler').agg({
'runs_bowler': 'sum',
'valid_ball': 'count'
}).reset_index()
expensive_stats.columns = ['Bowler', 'Runs_Conceded', 'Balls_Bowled']
expensive_stats = expensive_stats[expensive_stats['Balls_Bowled'] >= 250]
expensive_stats['Overs'] = expensive_stats['Balls_Bowled'] / 6
expensive_stats['Economy'] = (expensive_stats['Runs_Conceded'] / expensive_stats['Overs']).round(2)
top_expensive = expensive_stats.sort_values(by='Economy', ascending=False).head(15).reset_index(drop=True)
print(top_expensive[['Bowler', 'Overs', 'Runs_Conceded', 'Economy']])
Bowler Overs Runs_Conceded Economy 0 Akash Deep 46.333333 513 11.07 1 Arshad Khan 43.333333 464 10.71 2 CJ Anderson 49.500000 497 10.04 3 Fazalhaq Farooqi 42.333333 425 10.04 4 Yash Thakur 73.833333 736 9.97 5 Mukesh Kumar 106.000000 1056 9.96 6 G Coetzee 45.500000 445 9.78 7 Vijaykumar Vyshak 55.500000 542 9.77 8 Basil Thampi 86.833333 828 9.54 9 Mukesh Choudhary 52.500000 497 9.47 10 MP Stoinis 148.000000 1397 9.44 11 SM Curran 208.833333 1969 9.43 12 Kartik Tyagi 70.333333 662 9.41 13 Azmatullah Omarzai 48.000000 451 9.40 14 R Parag 49.166667 461 9.38
In [100]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_expensive, x='Bowler', y='Economy', palette='flare')
plt.title('Top 15 Most Expensive Bowlers in IPL (by Economy)', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Economy Rate')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.tight_layout()
plt.show()
In [101]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
death_df = df[(df['over'] >= 16) & (df['over'] <= 20)]
death_wickets = death_df[death_df['bowler_wicket'] == 1]
death_wicket_stats = death_wickets.groupby('bowler')['bowler_wicket'].sum().reset_index()
death_wicket_stats.columns = ['Bowler', 'Wickets']
top_15_death_bowlers = death_wicket_stats.sort_values(by='Wickets', ascending=False).head(15).reset_index(drop=True)
print(top_15_death_bowlers)
Bowler Wickets 0 DJ Bravo 102 1 B Kumar 92 2 SL Malinga 90 3 JJ Bumrah 86 4 HV Patel 70 5 MM Sharma 61 6 SP Narine 60 7 Mohammed Shami 58 8 CH Morris 55 9 K Rabada 53 10 Sandeep Sharma 53 11 TA Boult 52 12 JD Unadkat 50 13 Arshdeep Singh 47 14 AD Russell 47
In [102]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_15_death_bowlers, x='Bowler', y='Wickets', palette='inferno')
plt.title('Top 15 Bowlers with Most Wickets in Death Overs (16–20)', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Wickets in Death Overs')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [103]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
powerplay_df = df[(df['over'] >= 1) & (df['over'] <= 6)]
powerplay_wickets = powerplay_df[powerplay_df['bowler_wicket'] == 1]
powerplay_wicket_stats = powerplay_wickets.groupby('bowler')['bowler_wicket'].sum().reset_index()
powerplay_wicket_stats.columns = ['Bowler', 'Wickets']
top_15_powerplay_bowlers = powerplay_wicket_stats.sort_values(by='Wickets', ascending=False).head(15).reset_index(drop=True)
print(top_15_powerplay_bowlers)
Bowler Wickets 0 I Sharma 57 1 B Kumar 53 2 DL Chahar 53 3 Sandeep Sharma 51 4 UT Yadav 49 5 R Ashwin 45 6 Z Khan 41 7 Mohammed Shami 41 8 TA Boult 40 9 DS Kulkarni 39 10 Harbhajan Singh 38 11 JJ Bumrah 38 12 MM Sharma 36 13 RP Singh 36 14 M Morkel 36
In [104]:
plt.figure(figsize=(12, 6))
sns.barplot(data=top_15_powerplay_bowlers, x='Bowler', y='Wickets', palette='magma')
plt.title('Top 15 Bowlers with Most Wickets in Powerplay (Overs 1–6)', fontsize=16)
plt.xlabel('Bowler')
plt.ylabel('Wickets in Powerplay')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [105]:
print("\033[1mBowlers Vs Batsman\033[0m")
Bowlers Vs Batsman
In [106]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
kohli_dismissals = df[df['player_out'] == 'V Kohli']
kohli_out_by_bowler = kohli_dismissals.groupby('bowler')['player_out'].count().reset_index()
kohli_out_by_bowler.columns = ['Bowler', 'Dismissals']
top5_kohli_out = kohli_out_by_bowler.sort_values(by='Dismissals', ascending=False).head(5).reset_index(drop=True)
print(top5_kohli_out)
Bowler Dismissals 0 Sandeep Sharma 7 1 A Nehra 6 2 Mohammed Shami 5 3 JJ Bumrah 5 4 DS Kulkarni 4
In [107]:
plt.figure(figsize=(8, 5))
sns.barplot(data=top5_kohli_out, x='Bowler', y='Dismissals', palette='rocket')
plt.title('Top 5 Bowlers Who Dismissed Virat Kohli the Most', fontsize=14)
plt.xlabel('Bowler')
plt.ylabel('Number of Dismissals')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [108]:
rohit_dismissals = df[df['player_out'] == 'RG Sharma']
rohit_out_by_bowler = rohit_dismissals.groupby('bowler')['player_out'].count().reset_index()
rohit_out_by_bowler.columns = ['Bowler', 'Dismissals']
top5_rohit_out = rohit_out_by_bowler.sort_values(by='Dismissals', ascending=False).head(5).reset_index(drop=True)
print("🔹 Top 5 Bowlers Who Dismissed Rohit Sharma:")
print(top5_rohit_out)
🔹 Top 5 Bowlers Who Dismissed Rohit Sharma:
Bowler Dismissals
0 SP Narine 8
1 A Mishra 7
2 R Vinay Kumar 6
3 DJ Bravo 5
4 SK Trivedi 5
In [109]:
plt.figure(figsize=(8, 5))
sns.barplot(data=top5_rohit_out, x='Bowler', y='Dismissals', palette='coolwarm')
plt.title('Top 5 Bowlers Who Dismissed Rohit Sharma the Most', fontsize=14)
plt.xlabel('Bowler')
plt.ylabel('Number of Dismissals')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [110]:
dhoni_dismissals = df[df['player_out'] == 'MS Dhoni']
dhoni_out_by_bowler = dhoni_dismissals.groupby('bowler')['player_out'].count().reset_index()
dhoni_out_by_bowler.columns = ['Bowler', 'Dismissals']
top5_dhoni_out = dhoni_out_by_bowler.sort_values(by='Dismissals', ascending=False).head(5).reset_index(drop=True)
print("🔸 Top 5 Bowlers Who Dismissed MS Dhoni:")
print(top5_dhoni_out)
🔸 Top 5 Bowlers Who Dismissed MS Dhoni:
Bowler Dismissals
0 Z Khan 7
1 PP Ojha 7
2 JJ Bumrah 4
3 HV Patel 4
4 YS Chahal 4
In [111]:
plt.figure(figsize=(8, 5))
sns.barplot(data=top5_dhoni_out, x='Bowler', y='Dismissals', palette='plasma')
plt.title('Top 5 Bowlers Who Dismissed MS Dhoni the Most', fontsize=14)
plt.xlabel('Bowler')
plt.ylabel('Number of Dismissals')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [112]:
df = pd.read_csv("IPL.csv")
df.columns = df.columns.str.strip()
bumrah_df = df[df['bowler'] == 'JJ Bumrah']
runs_vs_bumrah = bumrah_df.groupby('batter')['runs_batter'].sum().reset_index()
runs_vs_bumrah.columns = ['Batter', 'Runs_Against_Bumrah']
top10_vs_bumrah = runs_vs_bumrah.sort_values(by='Runs_Against_Bumrah', ascending=False).head(10).reset_index(drop=True)
print(top10_vs_bumrah)
Batter Runs_Against_Bumrah 0 V Kohli 155 1 KL Rahul 150 2 AB de Villiers 131 3 S Dhawan 105 4 MK Pandey 81 5 SS Iyer 74 6 JP Duminy 70 7 JC Buttler 69 8 KD Karthik 68 9 SPD Smith 68
In [113]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top10_vs_bumrah, x='Batter', y='Runs_Against_Bumrah', palette='viridis')
plt.title('Top 10 Batsmen Scoring Most Runs vs Jasprit Bumrah', fontsize=14)
plt.xlabel('Batsman')
plt.ylabel('Total Runs Scored')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [114]:
shami_df = df[df['bowler'] == 'Mohammed Shami']
runs_vs_shami = shami_df.groupby('batter')['runs_batter'].sum().reset_index()
runs_vs_shami.columns = ['Batter', 'Runs_Against_Sham']
top10_vs_shami = runs_vs_shami.sort_values(by='Runs_Against_Sham', ascending=False).head(10).reset_index(drop=True)
print(top10_vs_shami)
Batter Runs_Against_Sham 0 F du Plessis 118 1 S Dhawan 113 2 DA Warner 109 3 V Kohli 107 4 AD Russell 99 5 AM Rahane 94 6 JC Buttler 86 7 RG Sharma 84 8 SV Samson 82 9 Shubman Gill 76
In [115]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top10_vs_shami, x='Batter', y='Runs_Against_Sham', palette='cool')
plt.title('Top 10 Batsmen Scoring Most Runs vs Mohammed Shami', fontsize=14)
plt.xlabel('Batsman')
plt.ylabel('Total Runs Scored')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [116]:
bhuvi_df = df[df['bowler'] == 'B Kumar']
runs_vs_bhuvi = bhuvi_df.groupby('batter')['runs_batter'].sum().reset_index()
runs_vs_bhuvi.columns = ['Batter', 'Runs_Against_Bhuvi']
top10_vs_bhuvi = runs_vs_bhuvi.sort_values(by='Runs_Against_Bhuvi', ascending=False).head(10).reset_index(drop=True)
print(top10_vs_bhuvi)
Batter Runs_Against_Bhuvi 0 CH Gayle 131 1 V Kohli 129 2 SV Samson 120 3 RR Pant 120 4 F du Plessis 109 5 KL Rahul 109 6 AM Rahane 104 7 MS Dhoni 102 8 YBK Jaiswal 101 9 AD Russell 97
In [117]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top10_vs_bhuvi, x='Batter', y='Runs_Against_Bhuvi', palette='crest')
plt.title('Top 10 Batsmen Scoring Most Runs vs Bhuvneshwar Kumar', fontsize=14)
plt.xlabel('Batsman')
plt.ylabel('Total Runs Scored')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [118]:
dismissal_counts = df['wicket_kind'].value_counts().reset_index()
dismissal_counts.columns = ['Dismissal_Type', 'Count']
print(dismissal_counts.head(10))
Dismissal_Type Count 0 caught 8665 1 bowled 2345 2 run out 1153 3 lbw 853 4 caught and bowled 388 5 stumped 376 6 hit wicket 18 7 retired hurt 17 8 retired out 5 9 obstructing the field 3
In [119]:
plt.figure(figsize=(10, 6))
sns.barplot(data=dismissal_counts.head(10), x='Count', y='Dismissal_Type', palette='rocket')
plt.title('Most Common Dismissal Types in IPL', fontsize=14)
plt.xlabel('Count')
plt.ylabel('Dismissal Type')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [120]:
catch_df = df[df['wicket_kind'] == 'caught']
catch_df = catch_df[catch_df['fielders'].notna()]
catch_counts = catch_df['fielders'].value_counts().reset_index()
catch_counts.columns = ['Fielder', 'Catches']
top_catch_fielders = catch_counts.head(10)
print(top_catch_fielders)
Fielder Catches 0 MS Dhoni 158 1 KD Karthik 145 2 AB de Villiers 120 3 V Kohli 117 4 SK Raina 106 5 RA Jadeja 103 6 RG Sharma 101 7 S Dhawan 100 8 KA Pollard 97 9 WP Saha 93
In [121]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top_catch_fielders, x='Catches', y='Fielder', palette='viridis')
plt.title('Top 10 Fielders with Most Catches in IPL', fontsize=14)
plt.xlabel('Total Catches')
plt.ylabel('Fielder')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [122]:
all_fielding = df[df['fielders'].notna()]
fielder_total_dismissals = all_fielding['fielders'].value_counts().reset_index()
fielder_total_dismissals.columns = ['Fielder', 'Total_Fielding_Dismissals']
top_fielders = fielder_total_dismissals.head(10)
print(top_fielders)
Fielder Total_Fielding_Dismissals 0 MS Dhoni 221 1 KD Karthik 191 2 AB de Villiers 134 3 RV Uthappa 128 4 V Kohli 126 5 WP Saha 121 6 SV Samson 113 7 SK Raina 112 8 RA Jadeja 109 9 RR Pant 106
In [123]:
plt.figure(figsize=(10, 5))
sns.barplot(data=top_fielders, x='Total_Fielding_Dismissals', y='Fielder', palette='magma')
plt.title('Best Fielders in IPL (Total Dismissals)', fontsize=14)
plt.xlabel('Total Fielding Contributions')
plt.ylabel('Fielder')
plt.grid(axis='x', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [ ]: